Time to Restore - 2024-2025 data

Author

Erin Zylstra

Published

September 19, 2025

Identifying priority species

Used list of priority species based on: TimeToRestore_AllPrioritySpecies_2024 google sheet (extracted on 2 April 2025) and made the following changes:

  • Corrected the spelling of one genus (Vernonia) and one species (cespitosa)

  • Updated scientific and common names based on ITIS for mistflower (Conoclinium), basket-flower (Centaurea), and Lantana; will use the common name listed in ITIS/NPN data base for these species imoving forward.

  • Using the common name listed in NPN database for Sambucus nigra and Passiflora incarnata, which were each listed twice in the googlesheet under two different common names.

Code
# Load list of priority species
spp_list <- read.csv("data/ttr-priorityspecies-20250402.csv", 
                     na.strings = c(NA, ""))
spp_list <- spp_list %>%
  mutate(across(c(common_name, scientific_name), str_trim)) %>%
  # Edit spelling of one genus (Vernonia) and one species (cespitosa)
  # Update species based on ITIS for mistflower (Conoclinium), 
  # basket-flower (Centaurea), and Lantana
  mutate(scientific_name = case_when(
    scientific_name == "Oenothera caespitosa" ~ "Oenothera cespitosa",
    scientific_name == "Veronia gigantea" ~ "Vernonia gigantea",
    scientific_name == "Conoclinium greggii" ~ "Conoclinium dissectum",
    scientific_name == "Centaurea americana" ~ "Plectocephalus americanus",
    scientific_name == "Lantana urticoides" ~ "Lantana horrida", 
    .default = scientific_name
  )) %>%
  rename(ttr_common_name = common_name)

# Load information about NN species
nn_spp <- npn_species() %>% data.frame()
nn_spp <- nn_spp %>%
  filter(kingdom == "Plantae") %>%
  select(species_id, common_name, genus, species, functional_type) %>%
  mutate(scientific_name = paste(genus, species))

# Find NN info based on scientific name of priority species (inconsistent 
# capitalization in priority list and a duplicate in NN database [Canada goldenrod])
spp_list <- spp_list %>%
  left_join(nn_spp, by = "scientific_name")
# Check that all priority species have a match in NN database
  # filter(spp_list, is.na(species_id))
# Are there any duplicates? Yes
  # count(spp_list, species_id) %>% filter(n > 1)
  # filter(spp_list, species_id == 90)
  # In priority list, Sambucus nigra listed as both black and common elderberry
  # filter(spp_list, species_id == 182)
  # In priority list, Passiflora incarnata listed as both purple passionflower and Maypop

# Remove entries for TTR priority species whose common name doesn't match NPN
spp_list <- spp_list %>%
  filter(!ttr_common_name %in% c("Maypop", "Common elderberry"))

This left us with 53 priority species.

Downloading status-intensity data

Used rnpn package to download status and intensity data for priority species in Louisiana, New Mexico, Oklahoma, and Texas from 1 October 2023 through the present day. Downloaded observations of 4 phenophases: flowers or flower buds (flower), open flowers, fruits, and ripe fruits. Since leaves on milkweed plants may be important for monarch eggs and catepillars, we also downloaded observations of the leaves phenophase for the 8 species of milkweed on the priority list. After downloading the data, we appended information about intensity categories.

Code
# First, check that all species have these 4 phenophases
phenophases_byspp <- npn_phenophases_by_species(
  species_ids = c(spp_list$species_id),
  date = "2025-01-01"
) %>% data.frame()
phenophases_byspp %>%
  group_by(species_id, species_name) %>%
  summarize(p500 = ifelse(500 %in% phenophase_id, 1, 0),
            p501 = ifelse(501 %in% phenophase_id, 1, 0),
            p516 = ifelse(516 %in% phenophase_id, 1, 0),
            p390 = ifelse(390 %in% phenophase_id, 1, 0),
            .groups = "keep") %>%
  rowwise() %>%
  filter(sum(c_across(p500:p390)) < 4)
# All species use these 4 phenophases

# What phenophases do the milkweeds use?
milkweed_phps <- phenophases_byspp %>%
  filter(str_detect(species_name, "milkweed")) %>%
  select(species_id, species_name, pheno_class_id, phenophase_id, phenophase_name) %>%
  arrange(species_name, pheno_class_id, phenophase_id)
# Since leaves may be important for monarch eggs and catepillars, we may also 
# want to include:
# 488 = Leaves (for milkweeds only)

# Download and format (or load existing) NPN data for priority plant species --#
  
# We want observations in 4 states (LA, NM, OK, TX)
# Focus on 2025 data, but also download 2024 data (for fruits and/or comparison)

phenophases <- c(500, 501, 516, 390)
states4 <- c("LA", "NM", "OK", "TX")
  # Note: we could be missing observations in the four states if we use
  # the states argument in the download function because sometimes the state 
  # field is missing or incorrect. Best to download all records for species and
  # fixing/imputing state, and then filtering by state.

data_filename <- "data/ttr-data-2024sep2025.csv"

if (!file.exists(data_filename) | update_data == TRUE) {
  
  # Download flowering, fruiting data for 2024-2025 (including 2023 so that 
  # we can calculate individual phenometrics for 2024, if needed)
  status_dl <- npn_download_status_data(
    request_source = "erinz",
    years = 2023:2025,
    species_ids = spp_list$species_id,
    phenophase_ids= phenophases,
    # states = states4,
    additional_fields = c("observedby_person_id",
                          "partner_group",
                          "site_name", 
                          "species_functional_type"))
  status_dl <- data.frame(status_dl)
  
  # Download leafing data for milkweeds in 2024-2025
  milkweeds <- spp_list %>%
    filter(str_detect(common_name, "milkweed")) %>%
    pull(species_id)
  status_mwleaf_dl <- npn_download_status_data(
    request_source = "erinz",
    years = 2023:2025,
    species_ids = milkweeds,
    phenophase_ids= 488,
    # states = states4,
    additional_fields = c("observedby_person_id",
                          "partner_group",
                          "site_name", 
                          "species_functional_type"))
  status_mwleaf_dl <- data.frame(status_mwleaf_dl)
  
  # Combine everything and format
  status_df <- rbind(status_dl, status_mwleaf_dl) %>%
    mutate(obsdate = ymd(observation_date),
           yr = year(obsdate),
           php = case_when(
             phenophase_id == 500 ~ "flower",
             phenophase_id == 501 ~ "open flower",
             phenophase_id == 516 ~ "fruit",
             phenophase_id == 390 ~ "ripe fruit",
             phenophase_id == 488 ~ "leaves")) %>%
    filter(obsdate >= "2023-10-01") %>%
    select(-c(update_datetime, elevation_in_meters, genus, species, kingdom,
              phenophase_description, abundance_value, observation_date)) %>%
    rename(person_id = observedby_person_id,
           func_type = species_functional_type,
           lat = latitude,
           lon = longitude)
  
  # Some observations missing state ID. Will use a shapefile to get an assigned
  # state for each site and use that moving forward
  state_fill <- status_df %>%
    select(site_id, lon, lat, state) %>%
    distinct()
  state_fillv <- vect(state_fill, 
                      geom = c("lon", "lat"), 
                      crs = "epsg:4326")
  state_new <- terra::extract(states, state_fillv)
  state_fill <- cbind(state_fill, state_new = state_new$STUSPS)
  # check:
  # count(state_fill, state, state_new) %>%
  #   mutate(same = ifelse(state == state_new, 1, 0)) %>%
  #   arrange(same)
  
  # Attach new state labels and exclude observations that aren't in the 4 states
  status_df <- status_df %>%
    left_join(select(state_fill, site_id, state_new), by = "site_id") %>%
    select(-state) %>%
    rename(state = state_new) %>%
    filter(!is.na(state) & state %in% states4)
  
  # Write to file
  write.csv(status_df, data_filename, row.names = FALSE)
  # Remove objects
  rm(status_df, status_dl, status_mwleaf_dl)
}

status <- read.csv(data_filename) %>%
  mutate(obsdate = ymd(obsdate),
         wy = ifelse(obsdate >= "2024-10-01", 2025, 2024))

For the purposes of this report, we focused on observations submitted in the current water year, from 1 October 2024 - 18 September 2025, the last date that observations were available. Throughout this report, we often compared current water-year data with data collected during the previous water year (October 2023 - September 2024), while keeping in mind that we are not entirely through the current water year.

Code
# Download information about intensity categories
ic <- npn_abundance_categories() %>% data.frame()
ic <- ic %>%
  rename(intensity_category_id = category_id, 
         intensity_value_id = value_id,
         intensity_name = category_name,
         intensity_value = value_name) %>%
  select(-c(category_description, value_description))

# Extract just those categories that appear in status data and format:
ic_subset <- ic %>%
  filter(intensity_category_id %in% unique(status$intensity_category_id)) %>%
  mutate(value1 = NA,
         value2 = NA,
         intensity_type = case_when(
           str_detect(intensity_value, "%") ~ "percent",
           str_detect(intensity_value, "[0-9]") ~ "number",
           .default = "qualitative"
         ))
val12 <- which(colnames(ic_subset) %in% c("value1", "value2"))
for (i in 1:nrow(ic_subset)) {
  if (str_detect(ic_subset$intensity_value[i], " to ")) {
    ic_subset[i, val12] <- str_split_fixed(ic_subset$intensity_value[i], " to ", 2)
    ic_subset[i, val12] <- as.numeric(str_remove(ic_subset[i, val12], ","))
  } else if (str_detect(ic_subset$intensity_value[i], "-")) {
    ic_subset[i, val12] <- str_split_fixed(ic_subset$intensity_value[i], "-", 2)
    ic_subset[i, val12[2]] <- str_remove(ic_subset[i, val12[2]], "%")
  } else if (str_detect(ic_subset$intensity_value[i], "% or more")) {
    ic_subset[i, val12] <- str_remove(ic_subset$intensity_value[i], "% or more")
  } else if (str_detect(ic_subset$intensity_value[i], "Less than ")) {
    ic_subset[i, val12[1]] <- 0
    ic_subset[i, val12[2]] <- str_remove(ic_subset$intensity_value[i], "Less than ")
    ic_subset[i, val12[2]] <- str_remove(ic_subset[i, val12[2]], "%")
  } else if (str_detect(ic_subset$intensity_value[i], "More than ")) {
    ic_subset[i, val12] <- str_remove(ic_subset$intensity_value[i], "More than ")
    ic_subset[i, val12[1]] <- as.numeric(str_remove(ic_subset[i, val12[1]], ",")) + 1
    ic_subset[i, val12[2]] <- as.numeric(str_remove(ic_subset[i, val12[2]], ",")) + 1
  }
}
ic_subset <- ic_subset %>%
  mutate_at(c("value1", "value2"), as.numeric)

# Assigning a middle-ish value for each range (keeping it to nice numbers like 
# 5, 50, 500, and 5000)
ic_subset <- ic_subset %>%
  mutate(mag = nchar(value1) - 1) %>%
  mutate(value = case_when(
    value1 == value2 ~ round(value1),
    intensity_type == "number" & value1 == 0 ~ 1,
    intensity_type == "number" & value1 != 0 ~ 
      round_any(rowMeans(across(value1:value2)), 5 * (10 ^ mag)),
    intensity_type == "percent" ~ round(rowMeans(across(value1:value2))),
    .default = NA
  )) %>%
  select(-c(mag, value1, value2))

ic_append <- ic_subset %>%
  select(intensity_category_id, intensity_name, intensity_value, value) %>%
  rename(intensity_cat = intensity_value, 
         intensity = value)

status <- status %>%
  left_join(ic_append, 
            by = c("intensity_category_id", 
                   "intensity_value" = "intensity_cat")) %>%
  select(-intensity_category_id)

Identifying inconsistent phenophase status reports

We wanted to identify when observers provided incompatible status reports for different phenophases. In particular, we identified when observers reported a “no” to flowers but reported a “yes” or “?” to open flowers. Similarly, we identified when observers reported a “no” to fruits but reported a “yes” or “?” to ripe fruits. We also identified when observers reported a “?” to flowers but reported a “yes” to open flowers, and when observers reported a “?” to fruits but reported a “yes” to ripe fruits.

Code
# To look at this, can't have more than one observation of a plant per person
# per day. We've already removed duplicates, but now need to resolve instances 
# where somebody made multiple observations of the same plant on the same date 
# that differed in some way.

  # For now, will keep record with more advanced phenophase or higher 
  # intensity value. Will do this by sorting observations in descending
  # order and keeping only the first
  inddateobsp <- status %>%
    group_by(common_name, individual_id, obsdate, person_id, php) %>%
    summarize(n_obs = n(),
              .groups = "keep") %>%
    data.frame()
  inddateobsp$obsnum <- 1:nrow(inddateobsp)
  
  status <- status %>%
    arrange(person_id, individual_id, obsdate, php, 
            desc(phenophase_status), desc(intensity)) %>%
    left_join(select(inddateobsp, -c(n_obs, common_name)), 
              by = c("person_id", "individual_id", 
                     "obsdate", "php")) %>%
    # Create "dups" column, where dups > 1 indicates that the observation can be
    # removed since there's another observation that same day with more advanced
    # phenology or higher intensity/abundance.
    mutate(dups = sequence(rle(as.character(obsnum))$lengths))
  
  # Remove extra observations and unnecessary columns
  status <- status %>%
    filter(dups == 1) %>%
    select(-c(obsnum, dups)) %>%
    arrange(common_name, obsdate, person_id, php)

# To identify inconsistent status values, will need to put flower/fruit data 
# into wide form (all data for a plant visit in the same row). Removing
# unknown status observations first (<0.5% of fruit/flower observations).
statusw <- status %>%
  filter(php != "leaves") %>%
  filter(phenophase_status != -1) %>%
  select(person_id, partner_group, site_id, state, common_name, individual_id,
         wy, obsdate, php, phenophase_status, intensity) %>%
  rename(status = phenophase_status) %>%
  pivot_wider(names_from = php,
              names_glue = "{php}_{.value}",
              values_from = c(status, intensity)) %>%
  data.frame()

# Identify phenophase status inconsistencies
# NOTE: changing NAs to 999 in order to make this code simpler
statusw <- statusw %>%
  mutate(across(contains("status"), ~replace_na(., 999))) %>%
  # Problem: flower = 0, open = NA or 1
  mutate(flower0_openNot0 = ifelse(flower_status == 0 & open.flower_status != 0, 
                                   1, 0)) %>%
  # Problem: flower = NA, open = 1
  mutate(flowerNA_open1 = ifelse(flower_status == 999 & open.flower_status == 1,
                                 1, 0)) %>%
  # Problem: fruit = 0, ripe = NA or 1
  mutate(fruit0_ripeNot0 = ifelse(fruit_status == 0 & ripe.fruit_status != 0, 
                                   1, 0)) %>%
  # Problem: fruit = NA, ripe = 1
  mutate(fruitNA_ripe1 = ifelse(fruit_status == 999 & ripe.fruit_status == 1,
                                1, 0))

# Table summarizing problems with flower/open flower status
flower_probs <- statusw %>%
  group_by(common_name, wy) %>%
  summarize(n = n(),
            n_flower0 = sum(flower_status == 0),
            n_flower1 = sum(flower_status == 1),
            n_flowerNA = sum(flower_status == 999),
            n_open0 = sum(open.flower_status == 0),
            n_open1 = sum(open.flower_status == 1),
            n_openNA = sum(open.flower_status == 999),
            n_flower0_openNot0 = sum(flower0_openNot0 == 1),
            n_flowerNA_open1 = sum(flowerNA_open1 == 1),
            .groups = "keep") %>%
  filter(n_flower0_openNot0 + n_flowerNA_open1 > 0) %>%
  data.frame()

# Table summarizing problems with fruit/ripe fruit status
fruit_probs <- statusw %>%
  group_by(common_name, wy) %>%
  summarize(n = n(),
            n_fruit0 = sum(fruit_status == 0),
            n_fruit1 = sum(fruit_status == 1),
            n_fruitNA = sum(fruit_status == 999),
            n_ripe0 = sum(ripe.fruit_status == 0),
            n_ripe1 = sum(ripe.fruit_status == 1),
            n_ripeNA = sum(ripe.fruit_status == 999),
            n_fruit0_ripeNot0 = sum(fruit0_ripeNot0 == 1),
            n_fruitNA_ripe1 = sum(fruitNA_ripe1 == 1),
            .groups = "keep") %>%
  filter(n_fruit0_ripeNot0 + n_fruitNA_ripe1 > 0) %>%
  data.frame()
Table 3: Inconsistent reports of flowering phenophase statuses by species and water year
Species Water year No. observations Flowers:no AND Open flowers:yes/? Flowers:? AND Open flowers:yes
American beautyberry 2024 585 0 28
American beautyberry 2025 1106 13 22
American star-thistle 2025 44 1 0
Texas lupine 2025 179 3 0
West Indian shrubverbena 2025 106 4 1
blackeyed Susan 2025 199 2 0
blue mistflower 2025 166 1 1
common buttonbush 2024 300 2 0
common buttonbush 2025 359 1 0
common sunflower 2024 158 1 0
eastern baccharis 2025 83 1 0
eastern purple coneflower 2024 34 1 0
eastern purple coneflower 2025 265 0 1
eastern redbud 2024 462 4 2
eastern redbud 2025 543 5 2
horsetail milkweed 2024 151 1 1
mealycup sage 2025 165 1 0
purple prairie clover 2024 250 0 17
purple prairie clover 2025 176 0 17
red maple 2024 497 1 0
red maple 2025 369 2 1
rubber rabbitbrush 2024 181 3 0
rubber rabbitbrush 2025 188 1 0
wax mallow 2025 366 4 0
Table 4: Inconsistent reports of fruiting phenophase statuses
Species Water year No. observations Fruit:no AND Ripe fruit:yes/? Fruit:? AND Ripe fruit:yes
American beautyberry 2025 1106 2 0
American star-thistle 2025 44 1 0
Texas lupine 2025 179 2 0
West Indian shrubverbena 2025 106 3 0
blackeyed Susan 2024 1 1 0
blackeyed Susan 2025 199 12 0
blue mistflower 2025 166 5 1
common buttonbush 2024 300 1 0
common buttonbush 2025 359 1 0
common milkweed 2025 15 1 0
common sunflower 2024 158 4 0
common sunflower 2025 201 1 0
eastern baccharis 2024 114 1 0
eastern purple coneflower 2025 265 1 1
eastern redbud 2024 462 1 0
eastern redbud 2025 543 14 0
firewheel 2025 117 5 1
green antelopehorn 2025 199 1 0
horsetail milkweed 2024 151 1 0
horsetail milkweed 2025 145 2 0
lemon beebalm 2025 10 1 0
mealycup sage 2025 165 0 1
purple passionflower 2025 27 1 0
rubber rabbitbrush 2025 188 2 0
spider milkweed 2025 57 1 0
turkey tangle fogfruit 2025 87 2 0
wax mallow 2025 366 2 0
white crownbeard 2025 250 2 0
wild bergamot 2025 136 6 1

Summary of sites monitored

Code
# Plot locations where flowering/fruiting or milkweed leaves observed in current
# water year (Oct 2024 to present)
status <- status %>%
  mutate(ind_date = paste0(individual_id, "_", obsdate)) 

locs <- status %>%
  filter(wy == 2025) %>%
  group_by(site_id, lat, lon, state) %>%
  summarize(nspp = n_distinct(common_name),
            nplants = n_distinct(individual_id),
            nobservers = n_distinct(person_id),
            # nobs: Number of observations, where an observations is all
            # data (all phenophases) submitted for a plant on given date
            nobs = n_distinct(ind_date), 
            .groups = "keep") %>%
  data.frame()

locs_by_state_int <- locs %>%
  group_by(state) %>%
  summarize(nspp_per_site = round(mean(nspp), 2),
            nplants_per_site = round(mean(nplants), 2),
            nobs_per_site = round(mean(nobs),2)) %>%
  data.frame()

locs_by_state <- status %>%
  filter(wy == 2025) %>%
  group_by(state) %>%
  summarize(nsites = n_distinct(site_id),
            nspp = n_distinct(common_name),
            nplants = n_distinct(individual_id),
            nobs = n_distinct(ind_date)) %>%
  data.frame() %>%
  left_join(locs_by_state_int, by = "state")
Figure 1: Locations of sites where priority species were monitored between October 2024 and present.
Figure 2: Locations of sites in Louisiana where priority species were monitored between October 2024 and present.
Figure 3: Locations of sites in New Mexico where priority species were monitored between October 2024 and present.
Figure 4: Locations of sites in Oklahoma where priority species were monitored between October 2024 and present.
Figure 5: Locations of sites in Texas where priority species were monitored between October 2024 and present.
Table 5: Summary of sites monitored in each state from October 2024 to present. Here, an observation is all data collected for a plant on a given date.
State No. sites No. species No. plants No. observations Mean no. species per site Mean no. plants per site Mean no. of observations per site
LA 15 10 63 869 2.47 4.20 57.93
NM 6 7 15 559 1.83 2.50 93.17
OK 11 8 35 398 1.45 3.18 36.18
TX 144 36 426 4336 2.69 2.96 30.11

Summary of species monitored

Table 6: Summary of plants monitored from October 2024 to present. Here, an observation is all data collected for a plant on a given date.
Species Functional group No. plants: LA No. plants: NM No. plants: OK No. plants: TX Total no. plants No. sites Mean no. observations
American beautyberry Deciduous broadleaf 11 0 0 70 81 64 13.5
eastern redbud Deciduous broadleaf 2 1 1 35 39 35 13.6
wax mallow Deciduous broadleaf 0 0 0 30 30 28 12.2
red maple Deciduous broadleaf 25 0 0 2 27 12 13.3
common buttonbush Deciduous broadleaf 11 0 0 13 24 20 13.6
West Indian shrubverbena Deciduous broadleaf 0 0 0 9 9 8 11.8
eastern baccharis Deciduous broadleaf 6 0 0 0 6 4 13.7
trumpet honeysuckle Deciduous broadleaf 0 0 0 4 4 4 18.8
Goodding’s willow Deciduous broadleaf 0 1 0 0 1 1 2.0
black elderberry Deciduous broadleaf 1 0 0 0 1 1 1.0
rubber rabbitbrush Drought deciduous broadleaf 0 4 0 0 4 2 43.8
blackeyed Susan Forb 0 0 10 17 27 21 7.3
mealycup sage Forb 0 0 0 24 24 20 6.9
Texas lupine Forb 0 0 0 22 22 21 8.1
blue mistflower Forb 0 0 0 22 22 22 7.5
eastern purple coneflower Forb 0 0 0 20 20 16 13.2
green antelopehorn Forb 0 0 2 17 19 18 10.5
white crownbeard Forb 3 0 0 15 18 17 13.8
common sunflower Forb 0 2 3 9 14 12 14.0
wild bergamot Forb 1 0 0 13 14 13 9.7
butterfly milkweed Forb 0 0 0 13 13 13 6.5
firewheel Forb 0 0 0 12 12 11 9.7
palmleaf thoroughwort Forb 0 0 0 11 11 11 9.4
spider milkweed Forb 0 0 0 11 11 7 5.2
lanceleaf tickseed Forb 0 0 8 1 9 5 5.9
purple passionflower Forb 1 0 0 6 7 7 3.9
purple prairie clover Forb 0 0 7 0 7 1 25.9
American star-thistle Forb 0 0 0 5 5 5 8.4
button eryngo Forb 2 0 3 0 5 2 10.4
Canada goldenrod Forb 0 0 0 4 4 4 8.2
lemon beebalm Forb 0 0 0 4 4 4 2.5
upright prairie coneflower Forb 0 0 1 3 4 4 6.8
horsetail milkweed Forb 0 3 0 0 3 2 43.3
showy milkweed Forb 0 2 0 1 3 3 30.0
aquatic milkweed Forb 0 0 0 2 2 2 21.0
broadleaf milkweed Forb 0 2 0 0 2 1 41.0
common milkweed Forb 0 0 0 2 2 2 7.5
golden crownbeard Forb 0 0 0 2 2 2 1.0
Texas thistle Forb 0 0 0 1 1 1 10.0
cardinalflower Forb 0 0 0 1 1 1 1.0
swamp milkweed Forb 0 0 0 1 1 1 1.0
turkey tangle fogfruit Semi-evergreen forb 0 0 0 12 12 12 7.1
straggler daisy Semi-evergreen forb 0 0 0 11 11 11 3.5
seaside goldenrod Semi-evergreen forb 0 0 0 1 1 1 15.0

Summary of observers

Table 7: Summary of people who submitted data for priority species since October 2024, by state. Values in table are means, with range of values in parentheses.
State No. observers No. species per observer No. plants per observer No. plants per species
LA 23 3.26 (1 - 6) 7.43 (1 - 21) 2.34 (1 - 7)
NM 22 1.91 (1 - 5) 2.5 (1 - 8) 1.14 (1 - 1.6)
OK 12 1.42 (1 - 6) 6.92 (1 - 27) 5.04 (1 - 6)
TX 123 3.14 (1 - 23) 3.89 (1 - 23) 1.44 (1 - 5)


Figure 6: Number of species each observer monitored since October 2024.


Figure 7: Number of plants per species that observers monitored since October 2024

Observation frequency: Reporting a “no” prior to the first “yes”

To evaluate observation frequency, we downloaded individual phenometrics data for priority species from 1 October 2023 through the present day.

Code
# Download individual phenometrics data
ip_filename <- "data/ttr-ipdata-2024sep2025.csv"

if (!file.exists(ip_filename) | update_data == TRUE) {
  
  # Download flowering, fruiting data for Oct 2024 - April 2025 and Oct 2023 - 
  # Sep 2024 (phenometrics aggregated over water year).
  # After some experimenting, it looks like when requesting data by water year, 
  # the years argument corresponds to the start of each year in October. So we
  # need to use years = 2023:2024 for this call.
  ip_dl <- npn_download_individual_phenometrics(
    request_source = "erinz",
    years = 2023:2024,
    period_start = "10-01",
    period_end = "09-30",
    species_ids = spp_list$species_id,
    phenophase_ids= phenophases,
    states = states4,
    additional_fields = c("observedby_person_id",
                          "partner_group",
                          "site_name", 
                          "species_functional_type"))
  ip_dl <- data.frame(ip_dl)
  
  # Download leafing data for milkweeds in 2024-2025
  milkweeds <- spp_list %>%
    filter(str_detect(common_name, "milkweed")) %>%
    pull(species_id)
  ip_mwleaf_dl <- npn_download_individual_phenometrics(
    request_source = "erinz",
    years = 2023:2024,
    period_start = "10-01",
    period_end = "09-30",
    species_ids = milkweeds,
    phenophase_ids= 488,
    states = states4,
    additional_fields = c("observedby_person_id",
                          "partner_group",
                          "site_name", 
                          "species_functional_type"))
  ip_mwleaf_dl <- data.frame(ip_mwleaf_dl)
  
  # Combine everything and format
  ip_df <- rbind(ip_dl, ip_mwleaf_dl) %>%
    mutate(php = case_when(
      phenophase_id == 500 ~ "flower",
      phenophase_id == 501 ~ "open flower",
      phenophase_id == 516 ~ "fruit",
      phenophase_id == 390 ~ "ripe fruit",
      phenophase_id == 488 ~ "leaves")) %>%
    select(-c(observedby_person_id, elevation_in_meters, genus, species, kingdom,
              phenophase_description, first_yes_month, first_yes_day,
              first_yes_julian_date, last_yes_year, last_yes_month, 
              last_yes_day, last_yes_julian_date, numdays_until_next_no)) %>%
    rename(func_type = species_functional_type,
           lat = latitude,
           lon = longitude,
           prior_no = numdays_since_prior_no)
  
  # Some observations missing state ID. Will use a shapefile to get an assigned
  # state for each site and use that moving forward
  state_fill <- ip_df %>%
    select(site_id, lon, lat, state) %>%
    distinct()
  state_fillv <- vect(state_fill, 
                      geom = c("lon", "lat"), 
                      crs = "epsg:4326")
  state_new <- terra::extract(states, state_fillv)
  state_fill <- cbind(state_fill, state_new = state_new$STUSPS)
  # check:
  # count(state_fill, state, state_new) %>%
  #   mutate(same = ifelse(state == state_new, 1, 0)) %>%
  #   arrange(same)
  
  # Attach new state labels and exclude observations that aren't in the 4 states
  ip_df <- ip_df %>%
    left_join(select(state_fill, site_id, state_new), by = "site_id") %>%
    select(-state) %>%
    rename(state = state_new) %>%
    filter(!is.na(state) & state %in% states4)

  # Write to file
  write.csv(ip_df, ip_filename, row.names = FALSE)
  # Remove objects
  rm(ip_df, ip_dl, ip_mwleaf_dl)
}

ip <- read.csv(ip_filename)

For these summaries, we evaluated observation frequency by classifying flowering and fruiting observations based on the time elapsed between a “no” observation and the first “yes” observation of a given phenophase. Here, we compared first yeses that were submitted between 1 November 2024 and the present (~2025 water year) with first yeses that were submitted between 1 November 2023 and 30 September 2024 (~2024 water year).

Code
ip <- ip %>%
  # Create "season" (Nov 2024 - Sept 2025 = 2025)
  mutate(yes_date = parse_date_time(paste(first_yes_year, first_yes_doy),
                                    orders = "Y j"),
         yes_month = month(yes_date),
         season = case_when(
           yes_month %in% 11:12 ~ first_yes_year + 1,
           yes_month %in% 1:9 ~ first_yes_year,
           .default = NA))
# check
# count(ip, first_yes_year, yes_month, season)

# Proportion of first yeses that are preceded by a prior no within X days, 
# by phenophase and season
obsfreq_php <- ip %>%
  filter(!is.na(season)) %>%
  filter(php != "leaves") %>%
  mutate(php = factor(php, levels = c("flower", "open flower", "fruit", 
                                      "ripe fruit"))) %>%
  group_by(season, php) %>%
  summarize(nobs = n(),
            nobs3 = sum(!is.na(prior_no) & prior_no <= 3),
            nobs7 = sum(!is.na(prior_no) & prior_no <= 7),
            nobs14 = sum(!is.na(prior_no) & prior_no <= 14),
            nobs30 = sum(!is.na(prior_no) & prior_no <= 30),
            .groups = "keep") %>%
  mutate(prop3 = round(nobs3/nobs, 2),
         prop7 = round(nobs7/nobs, 2),
         prop14 = round(nobs14/nobs, 2),
         prop30 = round(nobs30/nobs, 2)) %>%
  data.frame()

# Make a bar chart for this (minus milkweed leaf observations because few of them)
obsfreq_phpl <- ip %>%
  filter(!is.na(season)) %>%
  filter(php != "leaves") %>%
  mutate(php = factor(php, levels = c("flower", "open flower", 
                                      "fruit", "ripe fruit"))) %>%
  group_by(season, php) %>%
  summarize(nobs03 = sum(!is.na(prior_no) & prior_no %in% 1:3),
            nobs07 = sum(!is.na(prior_no) & prior_no %in% 4:7),
            nobs14 = sum(!is.na(prior_no) & prior_no %in% 8:14),
            nobs30 = sum(!is.na(prior_no) & prior_no %in% 15:30),
            nobs99 = sum((!is.na(prior_no) & prior_no > 30) | is.na(prior_no)),
            .groups = "keep") %>%
  pivot_longer(cols = nobs03:nobs99,
               names_to = "cat",
               values_to = "nobs") %>%
  data.frame()

freq_by_php <- ggplot(obsfreq_phpl) +
  geom_bar(aes(x = php, y = nobs, fill = cat),
           position = "stack",
           stat = "identity") +
  scale_fill_manual(values = c("#7fc97f", "#beaed4", "#fdc086",
                               "#ffff99", "#386cb0"),
                    labels = c("1-3 days", "4-7 days", "8-14 days",
                               "15-30 days", ">30 days")) +
  facet_grid( ~ season) +
  labs(x = "Phenophase", y = 'Number of first "yes" observations',
       fill = 'Prior "no"') +
  theme_bw()

# Proportion of first yeses that are preceded by a prior no within X days, 
# by species and season
obsfreq_spp <- ip %>%
  filter(!is.na(season)) %>%
  filter(php != "leaves") %>%
  group_by(common_name, season) %>%
  summarize(nobs = n(),
            nobs3 = sum(!is.na(prior_no) & prior_no <= 3),
            nobs7 = sum(!is.na(prior_no) & prior_no <= 7),
            nobs14 = sum(!is.na(prior_no) & prior_no <= 14),
            nobs30 = sum(!is.na(prior_no) & prior_no <= 30),
            .groups = "keep") %>%
  mutate(prop3 = round(nobs3/nobs, 2),
         prop7 = round(nobs7/nobs, 2),
         prop14 = round(nobs14/nobs, 2),
         prop30 = round(nobs30/nobs, 2)) %>%
  data.frame()

# Make a bar chart for this
# Only using species that had at least 20 first yeses in one of the years:
spp10 <- unique(obsfreq_spp$common_name[obsfreq_spp$nobs >= 20])
obsfreq_sppl <- ip %>%
  filter(!is.na(season)) %>%
  filter(php != "leaves") %>%
  filter(common_name %in% spp10) %>%
  mutate(common_name = factor(common_name)) %>%
  group_by(season, common_name) %>%
  summarize(nobs03 = sum(!is.na(prior_no) & prior_no %in% 1:3),
            nobs07 = sum(!is.na(prior_no) & prior_no > 3 & prior_no < 8),
            nobs14 = sum(!is.na(prior_no) & prior_no %in% 8:14),
            nobs30 = sum(!is.na(prior_no) & prior_no %in% 15:30),
            nobs99 = sum((!is.na(prior_no) & prior_no > 30) | is.na(prior_no)),
            .groups = "keep") %>%
  pivot_longer(cols = nobs03:nobs99,
               names_to = "cat",
               values_to = "nobs") %>%
  data.frame()

freq_by_spp <- ggplot(obsfreq_sppl) +
  geom_bar(aes(x = common_name, y = nobs, fill = cat),
           position = "stack",
           stat = "identity") +
  scale_fill_manual(values = c("#7fc97f", "#beaed4", "#fdc086",
                               "#ffff99", "#386cb0"),
                    labels = c("1-3 days", "4-7 days", "8-14 days",
                               "15-30 days", ">30 days")) +
  scale_x_discrete(labels = function(x) str_wrap(x, width = 15)) +
  facet_grid(rows = vars(season)) +
  labs(x = "Species", y = 'Number of first "yes" observations',
       fill = 'Prior "no"') +
  theme_bw() +
  theme(legend.position = "bottom", 
        axis.text.x = element_text(angle = 45, vjust=1, hjust=1))


Figure 8: Proportion of first “yes” observations that were associated with a prior “no” with a given number of days, grouped by phenophase. 2024 = November 2023 - September 2024. 2025 = November 2024 - present.


Figure 9: Proportion of first “yes” observations that were associated with a prior “no” with a given number of days, grouped by species. We excluded species with fewer than 10 “yes” observations in both water years. 2024 = November 2023 - September 2024. 2025 = November 2024 - present.

Plant information

We also extracted information that observers provided about individual plants and summarized this information by species.

Code
ids <- unique(status$individual_id)

# Will download in groups of 100 to avoid errors
groups <- ceiling(length(ids)/100)

for (i in 0:(groups - 1)) {
  index <- i*100 + 1:100
  ids_subset <- ids[index]
  
  idsl <- 1:length(ids_subset) - 1
  ids_call <- paste0("individual_id[", idsl, "]=", ids_subset, "&", collapse = "")
  ids_call <- str_sub(ids_call, end = -2)
  
  url_base <- "https://services.usanpn.org/npn_portal/individuals/getPlantDetails.json?"
  url <- paste0(url_base, ids_call)
  
  # Get information about each plant from API
  json_data <- read_json(
    path = url, 
    simplifyVector = TRUE
  )
  df_temp <- json_data %>%
    select(-c(comment, plant_image_url, plant_image_upload_date))
  
  if (i == 0) {
    df <- df_temp
  } else {
    df <- rbind(df, df_temp)
  }
  
  # Remove any duplicate rows (for some reason there are some)
  df <- distinct(df)
}

# Append information about species to the plant data
status <- status %>%
  left_join(select(nn_spp, species_id, common_name, scientific_name), 
            by = c("common_name", "species_id"))
df <- df %>%
  left_join(distinct(status, individual_id, common_name), by = "individual_id")

# Check that all species in status-intensity data appear in plant info df
# status$individual_id[!status$individual_id %in% df$individual_id]
# df$individual_id[!df$individual_id %in% status$individual_id]
# ok

# Check that there's only one row per individual
# length(unique(df$individual_id)); length(ids)

# Look at variable structures
# count(df, patch) # some yes, rest are blank
# count(df, patch_size) # some with value, most blank
# count(df, shade_status) # lots blank
# count(df, wild) # 0 or 1 or blank (lots)
# count(df, watered) # 0 or 1 or blank (lots)
# count(df, fertilized) # 0 or 1 or blank (lots)
# count(df, gender) # Female or Both or blank (lots)
# count(df, planting_date) # most blank or "--", rest with mix of year/month or exact date
# count(df, death_date_observed) # Only few values
# count(df, last_date_observed_alive) # Only fe values
# count(df, death_reason) # Few Unknown, rest blank

# Summarize by species
spp <- df %>%
  group_by(common_name) %>%
  summarize(n_plants = n(),
            patch_yes = sum(patch == 1),
            patch_with_size = sum(patch_size != ""),
            shade_reported = sum(shade_status != ""),
            wild_yes = sum(wild == 1),
            wild_no = sum(wild == 0),
            wild_unk = sum(wild == ""),
            water_yes = sum(watered == 1),
            water_no = sum(watered == 0),
            water_unk = sum(watered == ""),
            fert_yes = sum(fertilized == 1),
            fert_no = sum(fertilized == 0),
            fert_unk = sum(fertilized == ""),
            gender_f = sum(gender == "Female"),
            gender_b = sum(gender == "Both"),
            gender_unk = sum(gender == ""),
            planting_date = sum(planting_date %in% c("", "--")),
            death_date = sum(death_date_observed != "")) %>%
  data.frame()
  

allplants <- data.frame(
  common_name = "All plants",
  n_plants = nrow(df),
  patch_yes = sum(df$patch == 1),
  patch_with_size = sum(df$patch_size != ""),
  shade_reported = sum(df$shade_status != ""),
  wild_yes = sum(df$wild == 1),
  wild_no = sum(df$wild == 0),
  wild_unk = sum(df$wild == ""),
  water_yes = sum(df$watered == 1),
  water_no = sum(df$watered == 0),
  water_unk = sum(df$watered == ""),
  fert_yes = sum(df$fertilized == 1),
  fert_no = sum(df$fertilized == 0),
  fert_unk = sum(df$fertilized == ""),
  gender_f = sum(df$gender == "Female"),
  gender_b = sum(df$gender == "Both"),
  gender_unk = sum(df$gender == ""),
  planting_date = sum(df$planting_date %in% c("", "--")),
  death_date = sum(df$death_date_observed != "")
)

plant_info <- rbind(spp, allplants)

# Extract most useful information
plant_info2 <- plant_info %>%
  select(-c(shade_reported, gender_f, gender_b, gender_unk)) %>%
  mutate(patch = paste0(patch_yes, " (", patch_with_size, ")"),
         wild = paste0(wild_yes, "/", wild_no, "/", wild_unk),
         water = paste0(water_yes, "/", water_no, "/", water_unk),
         fertilized = paste0(fert_yes, "/", fert_no, "/", fert_unk)) %>%
  select(-c(patch_yes, patch_with_size, wild_yes, wild_no, wild_unk,
            water_yes, water_no, water_unk, fert_yes, fert_no, fert_unk))
Table 8: Summary of information observers reported about plants that have been monitored since October 2023, by species. Planting date and Death date represent the number of plants where observers provided the date of planting or the date a plant died (if relevant). Patch indicates how many of these “individuals” were monitered as a patch (i.e, multiple plants), and for those that were monitored as patches, the number of instances where the observer noted the approximate size of the patch (in square feet). Note that for the patch information, there were no data to differentiate a “no” response from unreported.
Species No. plants Planting date Death date Patch (size) Wild (Y/N/Unreported) Watered (Y/N/Unreported) Fertilized (Y/N/Unreported)
American beautyberry 86 72 0 4 (4) 21/24/41 15/27/44 2/38/46
American star-thistle 5 5 0 1 (0) 1/0/4 1/0/4 0/1/4
Canada goldenrod 4 4 0 0 (0) 1/0/3 1/0/3 0/1/3
Goodding’s willow 1 1 0 0 (0) 1/0/0 0/1/0 0/1/0
Texas lupine 22 16 0 6 (5) 6/6/10 4/7/11 0/11/11
Texas thistle 1 1 0 0 (0) 0/0/1 0/0/1 0/0/1
West Indian shrubverbena 9 4 0 3 (1) 1/6/2 6/0/3 1/4/4
aquatic milkweed 2 2 0 1 (1) 0/1/1 1/0/1 0/1/1
black elderberry 1 1 0 0 (0) 0/0/1 0/0/1 0/0/1
blackeyed Susan 27 22 0 8 (3) 0/5/22 4/1/22 0/5/22
blue mistflower 22 19 0 3 (3) 0/4/18 3/1/18 1/3/18
broadleaf milkweed 2 2 0 2 (2) 2/0/0 0/2/0 0/2/0
butterfly milkweed 13 12 0 1 (1) 0/2/11 2/0/11 0/2/11
button eryngo 6 5 0 0 (0) 0/3/3 0/3/3 1/2/3
cardinalflower 2 2 0 0 (0) 0/1/1 1/0/1 0/1/1
common buttonbush 30 26 0 0 (0) 6/11/13 4/13/13 1/15/14
common milkweed 2 2 0 0 (0) 0/0/2 0/0/2 0/0/2
common sunflower 17 16 0 8 (4) 6/2/9 3/5/9 0/6/11
eastern baccharis 7 6 0 0 (0) 3/0/4 0/3/4 0/3/4
eastern purple coneflower 24 13 0 3 (3) 1/13/10 15/0/9 3/12/9
eastern redbud 53 40 0 0 (0) 10/17/26 12/19/22 2/27/24
firewheel 13 12 0 4 (3) 3/3/7 3/4/6 1/6/6
golden crownbeard 2 1 0 0 (0) 0/1/1 1/0/1 0/1/1
green antelopehorn 20 15 1 5 (5) 5/5/10 5/5/10 0/10/10
horsetail milkweed 3 3 0 2 (2) 3/0/0 0/3/0 0/3/0
lanceleaf tickseed 9 9 1 7 (0) 0/0/9 0/0/9 0/0/9
lemon beebalm 4 3 0 1 (1) 1/1/2 1/1/2 1/1/2
mealycup sage 24 12 0 4 (4) 0/15/9 12/3/9 3/12/9
palmleaf thoroughwort 11 6 0 4 (4) 1/7/3 9/0/2 1/8/2
purple passionflower 8 4 0 2 (2) 0/4/4 3/2/3 0/5/3
purple prairie clover 7 7 0 0 (0) 0/0/7 0/0/7 0/0/7
red maple 28 22 1 0 (0) 7/6/15 4/10/14 0/14/14
rubber rabbitbrush 5 4 0 1 (1) 3/1/1 0/4/1 1/2/2
seaside goldenrod 1 1 0 1 (1) 0/1/0 1/0/0 0/1/0
showy milkweed 5 4 0 2 (2) 2/2/1 2/2/1 0/3/2
spider milkweed 11 11 0 2 (2) 6/0/5 0/6/5 0/6/5
straggler daisy 11 11 0 4 (3) 4/0/7 0/4/7 0/4/7
swamp milkweed 1 1 0 0 (0) 0/0/1 0/0/1 0/0/1
tall blazing star 1 1 0 0 (0) 0/1/0 1/0/0 0/1/0
trumpet honeysuckle 4 1 0 1 (0) 0/3/1 3/0/1 0/2/2
turkey tangle fogfruit 12 11 0 2 (2) 2/1/9 2/1/9 0/3/9
upright prairie coneflower 4 4 0 1 (0) 0/0/4 0/0/4 0/0/4
wax mallow 30 17 0 10 (9) 3/13/14 8/9/13 1/17/12
white crownbeard 18 16 0 7 (5) 9/3/6 3/7/8 0/10/8
wild bergamot 15 11 0 7 (7) 2/7/6 7/4/4 0/9/6
All plants 583 458 3 107 (80) 110/169/304 137/147/299 19/253/311